pack <- c('parallel', 'jsonlite', 'httr', 'caret', 'rdd', 'rdrobust', 'xlsx', 'Rfast',"readxl", "sqldf", "lubridate", "quanteda","rvest", "randomForest", "SnowballC", "tidytext", "topicmodels", "RTextTools", "tm", "zoo", "gdata", "readr", "purrr","stringdist" , "stringi", "data.table", "jsonlite", "XML", "httr", "ggrepel", "classInt", "MASS", "tile", "simcf","sp" ,"plyr", "dplyr", "stringr", "ggplot2", "lfe", "stargazer")
lapply(pack, require, character.only=T); rm(pack)

#########################################################################################
# http://api.duma.gov.ru/pages/dokumentatsiya/obrashchenie-k-api - webpage vs. application api
# Encoding: https://rstudio-pubs-static.s3.amazonaws.com/279354_f552c4c41852439f910ad620763960b6.html
# APPLICATION KEY
api_key <- ""
app_key <- ""
seqlast <- function (from, to, by) { vec <- do.call(what = seq, args = list(from, to, by))
if ( tail(vec, 1) != to ) { return(c(vec, to)) } else { return(vec)}}

##########################################################################################


###################################################################################################
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# - - - - - - -         API CALLS                         - - - - - - - - - - - - - 
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
###################################################################################################
###################################################################################################

###################################################################################################
# CONVOCATIONS
###################################################################################################

table_json <- GET(paste('http://api.duma.gov.ru/api/', api_key, '/periods.json?app_token=', app_key, sep=''))
convocation <- fromJSON(rawToChar(table_json$content))

###################################################################################################
# Deputies
###################################################################################################
deputy.list <- list()
loop3 <- GET(paste("http://api.duma.gov.ru/api/",api_key,"/deputies.json?app_token=", app_key, sep=""))
deputy.list <- fromJSON(rawToChar(loop3$content)); rm(loop3)
deputy.list$factions <- as.character(deputy.list$factions)

###################################################################################################
# QUESTION CODE:    
###################################################################################################
count <- GET(paste("http://api.duma.gov.ru/api/",api_key,"/questions.json?app_token=",app_key,sep=""))
count <- fromJSON(rawToChar(count$content))
pages <- seqlast(1, count$totalCount, 20)
list_object <- list()

start <- Sys.time()
for (i in 1:length(pages)){
  loop <- GET(paste("http://api.duma.gov.ru/api/",api_key,"/questions.json?app_token=",app_key,"&page=", i,sep=""))
  list_object[[i]] <- fromJSON(rawToChar(loop$content))
}

dta.list <- lapply(list_object, "[[", 4)
speeches_overview <- rbindlist(dta.list, fill=TRUE)


###################################################################################################
####################################################################################################
deputyinfo.list <- list()

for (i in 1:length(deputy.list$id)){
  loop5 <- GET(paste("http://api.duma.gov.ru/api/",api_key,"/deputy.json?app_token=", app_key,"&id=",deputy.list$id[i], sep=""))
  deputyinfo.list[[i]] <- fromJSON(rawToChar(loop5$content))
}; rm(loop5)

deputyinfo <- deputyinfo.list[lapply(deputyinfo.list,length)>0]
deputyinfo <- lapply(deputyinfo, function(x){as.data.frame(t(unlist(x)))})
deputyinfo <- rbindlist(deputyinfo, fill=TRUE)


###################################################################################################
###################################################################################################
# TRANSCRIPT
transcript.list <- list()

for (i in 1:nrow(speeches_overview)){
  loop2 <- GET(paste("http://api.duma.gov.ru/api/",api_key,"/transcriptQuestion/",speeches_overview$kodz[i],"/",speeches_overview$kodvopr[i],".json?app_token=", app_key, sep=""))
  transcript.list[[i]] <- fromJSON(rawToChar(loop2$content))
  print(i)
}

# 'lines' subelement isolated
transcript.dta <- list()
for (i in 1:length(transcript.list)){ # collapsing lines into one text cell
  transcript.dta[[i]] <- paste(transcript.list[[i]]$meetings$questions[[1]]$parts[[1]]$lines[[1]], collapse=" ")
  transcript.list[[i]]$meetings$questions[[1]]$parts[[1]]$lines <- NULL # deleting lines (managed above)
}

# all other subelements
meta.dta <- lapply(transcript.list, unlist, use.names=T)
meta.dta1 <- list()
fct <- function(x){if(!is.null(x)){data.table(t(x))} else(data.frame(NA))} # 33529 35740 35903 42068 are null and needs to be managed if lapply(fct) is to work. Filter(Negate(function(x) is.null(unlist(x))), meta.dta) would remove them. However, the final data.table would be 4 obs smaller with no easy way to merge with transcripts.
meta.dta <- lapply(meta.dta, fct)
meta.dta <- rbindlist(meta.dta, fill=T)
names(meta.dta) <- gsub("meetings.", "", names(meta.dta)); names(meta.dta) <- gsub("questions.", "", names(meta.dta)); names(meta.dta) <- gsub("parts.", "", names(meta.dta))
transcript.dta <- lapply(transcript.dta, fct)
transcript.dta <- rbindlist(transcript.dta, fill=T)
speeches_dta <- cbind(transcript.dta, meta.dta); rm(meta.dta); rm(transcript.dta); rm(transcript.list)
names(speeches_dta)[1] <- "transcript"


###################################################################################################
###################################################################################################
# VOTE   
###################################################################################################

####################################################################################
####################################################################################

votes_overview <- NULL
for (i in 1:700){
  law_json <- GET(paste('http://api.duma.gov.ru/api/', api_key, '/voteSearch.json?app_token=', app_key, '&limit=100&page=', i, '&to=2018-01-01 00:00:00', sep=""))
  law <- fromJSON(rawToChar(law_json$content))
  law$votes$iteration <- i
  if(!is.null(nrow(law$votes))){votes_overview <- rbind(votes_overview, law$votes, fill=T)}
}

# http://api.duma.gov.ru/pages/dokumentatsiya/rezultati-zakonodatelnoy-deyatelnosti
# lawProgram

session_id <- unlist(lapply(convocation$sessions, function(x){unlist(x$id)}))
votes_list <- list()
empty <- list()

for (i in 1:length(session_id)){
  law_json <- GET(paste('http://api.duma.gov.ru/api/', api_key, '/lawProgram/', session_id[i], '.json?app_token=', app_key, sep=""))
  votes_list[[i]] <- fromJSON(rawToChar(law_json$content))
  if(!identical(votes_list[[i]], empty)){
    test <- votes_list[[i]]$committees$responsible
    names(test) <- paste('committees', names(test), sep='_')
    votes_list[[i]] <- cbind(votes_list[[i]], test)
    votes_list[[i]]$committees <- NULL
    
    test <- votes_list[[i]]$topic
    names(test) <- paste('topic', names(test), sep='_')
    votes_list[[i]] <- cbind(votes_list[[i]], test)
    votes_list[[i]]$topic <- NULL
    
    test <- votes_list[[i]]$considerDepartment
    names(test) <- paste('department', names(test), sep='_')
    votes_list[[i]] <- cbind(votes_list[[i]], test)
    votes_list[[i]]$considerDepartment <- NULL
    
    votes_list[[i]] <- votes_list[[i]][, names(votes_list[[i]])!='subject']
  }
}

votes_overview <- rbindlist(votes_list)
votes_overview[, list(uniqueN(id), uniqueN(registrationNumber), uniqueN(name))]
votes_overview[, year := as.numeric(gsub('-.*', '', introduceDate))]
votes_overview[, uniqueN(name), by='year']
voting_w_MV[, uniqueN(lawNumber), by='year']

votes_overview <- votes_overview[, .SD, .SDcols=unlist(lapply(votes_overview, class))!='list']


